#!/bin/bash

## 计算指定日期的用户地域分布 /user/hive/warehouse/pass_data.db/user_login/dt=20150601

source ~/.bashrc

## 运行指定日期的数据
declare cur_date
if [[ $# -ne 1 ]];then
	echo "usage:sh run_erisedMR.sh 20160601	:date is must!"
	exit 1
else
	cur_date=$(date +"%Y%m%d" -d "$1")
	if [[ $? -ne 0 ]];then
		echo "usage:sh run_erisedMR.sh 20160601 	:date is invalid!"
	fi
fi
echo "==========sh run_erisedMR.sh $cur_date==========="

## 输入user_login和erised的分区数据
hdfsPath=/user/hive/warehouse/pass_data.db/user_login/dt=$cur_date
## 定期更换路径
erisedPath=/user/hive/warehouse/erised_user_profile/event_day=20150611/id_type=userid

## mr计算
output1=/tmp/erised/$cur_date/1
output2=/tmp/erised/$cur_date/2
localFile=/home/hadoop/udm/erised_$cur_date
hdfs dfs -rm -r $output1
hdfs dfs -rm -r $output2
hadoop jar udm-1.0-jar-with-dependencies.jar com.baidu.udm.mr.UserModel_Erised --input $hdfsPath,$erisedPath --output1 $output1 --output2 $output2 --dt $cur_date
if [[ $? -ne 0 ]];then
	echo "run mr failed! error:input=$hdfsPath,$erisedPath output=$output"
	exit 1
fi
